Python Standard Library Analysis¶

InĀ [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from analysis_utils import add_complexity_to_metadata, prepare_std_lib_treemap_data, create_std_lib_treemap, plot_usage_in_files, plot_usage_within_files, plot_mean_complexity
InĀ [2]:
metadata = pd.read_parquet('/workspaces/repos/randomstats/github/metadata.parquet').dropna()
metadata = add_complexity_to_metadata(metadata)
metadata
Out[2]:
calls assignments attributes size is_ipynb complexity
chunk_id
4444d3cdff850ebe6ce372c7b867141a2d041f11 31.0 18.0 24.0 2176.0 False 0.172107
92eea3f82b5e926c9a8d6568a2b3eb351199bf9e 3.0 2.0 4.0 590.0 False 0.024374
d8108f44778865705e71acaf17442a499afbc5d0 0.0 1.0 0.0 2278.0 False 0.031695
e66ae67333542402587a359a3a84554c27444aad 6.0 6.0 4.0 886.0 False 0.046665
1d6cb5c0c1a8f8d441bb85db936dd202ae3dc58f 6.0 4.0 13.0 876.0 False 0.051324
... ... ... ... ... ... ...
79e8c8fcb9d1293b01a25327c5193a3ec2149dd7 8.0 5.0 5.0 540.0 False 0.043811
55d9448fc5f4015cf76f7246de39ce8424870ee0 0.0 0.0 0.0 3984.0 True 0.049601
8bd51a5a60142e8e839697d0ead4cdaa2a16d64c 39.0 14.0 79.0 3727.0 False 0.262118
791f8251863b3a09b1cf8f4d16e56933b0830e39 35.0 26.0 24.0 6548.0 False 0.260235
5c2402c51df4f30da18b00dcbfd8d1899348d48a 23.0 6.0 18.0 2633.0 False 0.116104

7484750 rows Ɨ 6 columns

InĀ [3]:
libraries = pd.read_parquet('/workspaces/repos/randomstats/github/library_counts.parquet')
libraries
Out[3]:
chunk_id library_name component_type component count
0 284c4fa678838e17f66eed0db60ea67353fad38d math from_import_function ceil 2
1 284c4fa678838e17f66eed0db60ea67353fad38d warnings function warn 1
2 89c3b09cdaa74e55dc6241088e690a7cbf0dfe10 ctypes class c_byte 2
3 89c3b09cdaa74e55dc6241088e690a7cbf0dfe10 ctypes class CDLL 2
4 89c3b09cdaa74e55dc6241088e690a7cbf0dfe10 ctypes class c_void_p 47
... ... ... ... ... ...
125612 738e07780b1324c00aba152966305aaa0cc53ca4 os attribute path 1
125613 738e07780b1324c00aba152966305aaa0cc53ca4 pdb from_import_function pm 1
125614 641836419114f68ecc105425ade25cd7813979a4 os attribute path 7
125615 641836419114f68ecc105425ade25cd7813979a4 site function addsitedir 1
125616 0cd9ae3f7372a72618bf2966d1b8c75c618feb1d math function atan2 2

18056061 rows Ɨ 5 columns

InĀ [4]:
py_ids = metadata[metadata['is_ipynb'] == False].index
ipynb_ids = metadata[metadata['is_ipynb'] == True].index

libraries_py = libraries[libraries['chunk_id'].isin(py_ids)]
libraries_ipynb = libraries[libraries['chunk_id'].isin(ipynb_ids)]

Most Commonly Used Libraries and Their Components¶

By Number of Files They Were Used in¶

Top Libraries¶

InĀ [5]:
plot_usage_in_files(libraries, top_n=30)
No description has been provided for this image

Top os Components¶

InĀ [6]:
plot_usage_in_files(libraries, library_name='os', top_n=30)
No description has been provided for this image

Top logging Components¶

InĀ [7]:
plot_usage_in_files(libraries, library_name='logging', top_n=30)
No description has been provided for this image

Top re Components¶

InĀ [8]:
plot_usage_in_files(libraries, library_name='re', top_n=30)
No description has been provided for this image

Top re Functions¶

InĀ [9]:
plot_usage_in_files(libraries, library_name='re', top_n=18, component_types=['function', 'from_import_function', 'method', 'from_import_method'])
No description has been provided for this image

Top datetime Components¶

InĀ [10]:
plot_usage_in_files(libraries, library_name='datetime', top_n=30)
No description has been provided for this image

Top time components¶

InĀ [11]:
plot_usage_in_files(libraries, library_name='time', top_n=30)
No description has been provided for this image

Top unittest Components¶

InĀ [12]:
plot_usage_in_files(libraries, library_name='unittest', top_n=30)
No description has been provided for this image

Top json Components¶

InĀ [13]:
plot_usage_in_files(libraries, library_name='json', top_n=30)
No description has been provided for this image

Top json Functions¶

InĀ [14]:
plot_usage_in_files(libraries, library_name='json', top_n=30, component_types=['function', 'from_import_function', 'method', 'from_import_method'])
No description has been provided for this image

By Number of Their Components Usage¶

InĀ [15]:
df = libraries[['library_name', 'component_type', 'component', 'count']]
df_grouped = df.groupby(['library_name', 'component_type'])['count'].sum().reset_index()
df_pivot = df_grouped.pivot(index='library_name', columns='component_type', values='count').fillna(0).astype(int)
df_pivot['all_components'] = df_pivot.sum(axis=1)
columns_with_from_import = [col for col in df_pivot.columns if 'from_import' in col]
columns_without_from_import = [col for col in df_pivot.columns if 'from_import' not in col and col != 'all_components']
sorted_columns = ['all_components'] + sorted(columns_without_from_import) + sorted(columns_with_from_import)
df_pivot = df_pivot.reindex(sorted_columns, axis=1)
df_sorted = df_pivot.sort_values('all_components', ascending=False)
InĀ [16]:
df_sorted.head(50)
Out[16]:
component_type all_components attribute class exception function method from_import_attribute from_import_class from_import_exception from_import_function from_import_method
library_name
os 10336370 6990569 2910 4667 1551778 1572969 139821 447 80 69547 3582
unittest 9175504 112813 507738 7999 229315 8200405 132 88268 3659 24960 215
re 8514570 3209901 70 5995 1349108 3920251 601 9 67 17535 11033
logging 6805991 5879 60291 0 1062875 5634894 3 4243 0 26282 11524
datetime 4506318 438891 754423 0 0 1852843 20778 1192344 0 0 247039
json 1688985 391284 9716 1981 907038 354738 30 2140 879 21151 28
threading 1530820 183843 176859 190 10857 1081341 288 75067 13 1580 782
collections 1484301 90049 82680 0 30907 775558 672 406161 0 97624 650
ctypes 1287061 130028 238717 352 94672 4433 34262 526003 576 257480 538
argparse 1240415 0 159320 0 0 1049107 0 31927 0 0 61
time 1217957 0 1838 0 981814 0 0 529 0 233776 0
typing 1113545 3612 50917 0 2717 0 4084 1013348 0 38867 0
subprocess 1030862 413562 116502 32557 183976 177905 7709 35638 15512 47275 226
struct 972777 134911 10127 13573 319153 390757 1654 2846 992 53751 45013
io 923506 195677 88466 1699 27977 484731 286 112937 725 10803 205
socket 885323 47904 94483 55122 69795 596876 1163 8739 3581 6420 1240
os.path 856853 0 0 0 503060 0 0 0 0 353793 0
xml.etree.ElementTree 798467 163643 42717 0 141133 431419 490 6558 0 11212 1295
math 628739 0 0 0 394531 0 0 0 0 234208 0
traceback 616836 89487 420 0 122110 396352 29 61 0 8119 258
sys 557172 0 0 0 542371 0 0 0 0 14801 0
random 496583 0 11689 0 365880 0 0 3813 0 115201 0
multiprocessing 470704 32494 18270 214 13825 364191 36 20483 337 10308 10546
codecs 440154 115979 6669 0 52704 248614 1237 93 0 13666 1192
inspect 438718 277186 6270 0 99258 33848 2747 1362 0 18044 3
types 438023 48545 10221 0 981 363591 425 10301 0 320 3639
functools 414767 82120 507 0 108749 0 398 757 0 222236 0
uuid 412839 166037 24489 0 172427 0 0 14657 0 35229 0
sqlite3 362217 16878 5074 8875 26351 303466 28 219 857 457 12
unittest.mock 345158 16259 8686 0 13513 6966 44 110626 0 189064 0
zipfile 341482 99540 30376 1530 2479 196918 298 8851 1030 386 74
copy 328723 0 0 109 198820 0 0 0 29 129765 0
hashlib 318275 110591 0 0 4466 202909 2 0 0 300 7
warnings 278173 0 43632 0 200634 0 0 6801 0 27106 0
itertools 265484 0 0 0 104289 7868 0 0 0 153327 0
shutil 264450 7 0 1660 226427 0 0 0 353 36003 0
abc 253434 0 27018 0 91771 11499 0 39307 0 83837 2
pickle 253033 1081 1420 1858 93401 149152 3 486 641 3379 1612
decimal 234633 0 29287 0 674 48346 0 150059 0 1616 4651
pprint 217857 0 2667 0 23172 78436 0 531 0 57152 55899
string 217458 9929 11417 0 857 182018 26 12797 0 308 106
tempfile 212538 0 12021 0 153519 0 0 3744 0 43254 0
pathlib 211430 29568 21836 0 0 60770 61 99052 0 0 143
configparser 211065 30 11853 4129 0 185958 0 7076 2005 0 14
ast 205507 12927 134567 0 23530 19099 494 9345 0 5379 166
tarfile 192497 46540 3712 1512 17382 122712 0 549 58 30 2
operator 188231 0 0 0 100192 0 0 0 0 88039 0
urllib.request 162729 61372 7874 0 18085 54859 598 6150 0 13630 161
optparse 148828 16948 15624 0 0 88688 857 26709 0 0 2
csv 137084 14689 19792 1732 49770 47751 84 1973 106 1171 16
InĀ [17]:
plot_usage_within_files(df_sorted, ['class', 'function', 'method', 'attribute', 'exception'], top_n=30, number_format='M')
No description has been provided for this image
InĀ [18]:
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker

def create_std_lib_treemap(component_usage, title, width=1200, height=1200, text_size=18):
    """
    Create a treemap visualization of standard library usage.
    
    Args:
        component_usage (pd.DataFrame): The DataFrame containing the prepared data for the treemap.
        title (str): The title of the treemap.

    Returns:
        None: The function displays the treemap using Plotly.
    """
    hovertemplate = "%{label}<br>Count: %{customdata[0]}<br>Share of library: %{customdata[1]:.2f}%<br>Share of all: %{customdata[2]:.2f}%"

    fig = px.treemap(
        component_usage,
        path=['library_name', 'component'],
        values='count',
        color='library_percentage',
        custom_data=['count', 'library_percentage', 'total_percentage'],
        title=title,
        color_continuous_scale='RdBu',
        labels={'library_name': 'Library',
                'component': 'Component',
                'count': 'Count'}
    )
    
    fig.update_traces(hovertemplate=hovertemplate, textinfo='label+value+percent parent')

    fig.update_layout(
        width=width,
        height=height,
        uniformtext=dict(minsize=text_size, mode='show'),
        title={
            'text': title,
            'font': {'size': 30},
            'x': 0.5,
            'y': 0.98,
            'xanchor': 'center',
            'yanchor': 'top'
        },
        margin=dict(l=10, r=10, t=100, b=10)
    )
    fig.show()

Components in All Files¶

InĀ [19]:
component_usage_sum = prepare_std_lib_treemap_data(libraries, threshold=0.003)
create_std_lib_treemap(component_usage_sum, "Python Standard Library Modules Usage", width=1800, height=1400, text_size=10)

Components in .py Files¶

InĀ [20]:
component_usage_sum = prepare_std_lib_treemap_data(libraries_py)
create_std_lib_treemap(component_usage_sum, "Python Standard Library Components Usage in .py Files", width=1800, height=1400, text_size=10)

Components in .ipynb files¶

InĀ [21]:
component_usage_sum = prepare_std_lib_treemap_data(libraries_ipynb)
create_std_lib_treemap(component_usage_sum, "Python Standard Library Components Usage in .ipynb Files", width=1800, height=1400, text_size=10)

Components in Files with os¶

InĀ [22]:
os_module = libraries_py[libraries_py['library_name'] == 'os']
component_usage_sum = prepare_std_lib_treemap_data(os_module, threshold=0.001)
create_std_lib_treemap(component_usage_sum, "os Component Usage", width=1800, height=1400, text_size=10)

Components in Files with unittest¶

InĀ [23]:
unittest_module = libraries_py[libraries_py['library_name'] == 'unittest']
component_usage_sum = prepare_std_lib_treemap_data(unittest_module, threshold=0.001)
create_std_lib_treemap(component_usage_sum, "unittest Component Usage", width=1800, height=1400, text_size=10)

Components in Files with re¶

InĀ [24]:
re_module = libraries_py[libraries_py['library_name'] == 're']
component_usage_sum = prepare_std_lib_treemap_data(re_module, threshold=0.001)
create_std_lib_treemap(component_usage_sum, "re Component Usage", width=1800, height=1400, text_size=10)

Components in Files with logging¶

InĀ [25]:
logging_module = libraries_py[libraries_py['library_name'] == 'logging']
component_usage_sum = prepare_std_lib_treemap_data(logging_module, threshold=0.001)
create_std_lib_treemap(component_usage_sum, "logging Component Usage", width=1800, height=1400, text_size=10)

Components in Files with datetime¶

InĀ [26]:
datetime_module = libraries_py[libraries_py['library_name'] == 'datetime']
component_usage_sum = prepare_std_lib_treemap_data(datetime_module, threshold=0.001)
create_std_lib_treemap(component_usage_sum, "datetime Component Usage", width=1800, height=1400, text_size=10)

Components in Files with json¶

InĀ [27]:
json_module = libraries_py[libraries_py['library_name'] == 'json']
component_usage_sum = prepare_std_lib_treemap_data(json_module, threshold=0.001)
create_std_lib_treemap(component_usage_sum, "json Component Usage", width=1800, height=1400, text_size=10)

Components in Files with threading¶

InĀ [28]:
threading_module = libraries_py[libraries_py['library_name'] == 'threading']
component_usage_sum = prepare_std_lib_treemap_data(threading_module, threshold=0.001)
create_std_lib_treemap(component_usage_sum, "threading Component Usage", width=1800, height=1400, text_size=10)

Components in Files with collections¶

InĀ [29]:
collections_module = libraries_py[libraries_py['library_name'] == 'collections']
component_usage_sum = prepare_std_lib_treemap_data(collections_module, threshold=0.001)
create_std_lib_treemap(component_usage_sum, "collections Component Usage", width=1800, height=1400, text_size=10)

Classes in Files with collections¶

InĀ [30]:
component_types = 'from_import_class', 'class'
collections_module = libraries_py[libraries_py['library_name'] == 'collections']
component_usage_sum = prepare_std_lib_treemap_data(collections_module, component_types, threshold=0.001)
create_std_lib_treemap(component_usage_sum, "collections Class Usage", width=1800, height=1400, text_size=10)

Components in Files with ctypes (blogpost version)¶

InĀ [31]:
ctypes_module = libraries_py[libraries_py['library_name'] == 'ctypes']
component_usage_sum = prepare_std_lib_treemap_data(ctypes_module, threshold=0.01)
create_std_lib_treemap(component_usage_sum, "ctypes Component Usage")

Components in Files with shutil (blogpost version)¶

InĀ [32]:
ctypes_module = libraries_py[libraries_py['library_name'] == 'shutil']
component_usage_sum = prepare_std_lib_treemap_data(ctypes_module, threshold=0.01)
create_std_lib_treemap(component_usage_sum, "shutil Component Usage")

Components in Files with os (blogpost version)¶

InĀ [33]:
ctypes_module = libraries_py[libraries_py['library_name'] == 'os']
component_usage_sum = prepare_std_lib_treemap_data(ctypes_module, threshold=0.01)
create_std_lib_treemap(component_usage_sum, "os Component Usage")

Components in Files with unittest (blogpost version)¶

InĀ [34]:
ctypes_module = libraries_py[libraries_py['library_name'] == 'unittest']
component_usage_sum = prepare_std_lib_treemap_data(ctypes_module, threshold=0.01)
create_std_lib_treemap(component_usage_sum, "unittest Component Usage")

Library Co-Occurrence¶

By Number of Library Components Used¶

InĀ [35]:
libraries_counts = libraries[['chunk_id', 'library_name', 'count']].groupby(['chunk_id', 'library_name'])['count'].sum().reset_index()
libraries_counts_pivot = libraries_counts.pivot_table(index='chunk_id', columns='library_name', values='count', fill_value=0)
libraries_counts_pivot
Out[35]:
library_name _thread abc aifc argparse array ast asynchat asyncore atexit audioop ... xml.sax.handler xml.sax.saxutils xml.sax.xmlreader xmlrpc.client xmlrpc.server zipapp zipfile zipimport zlib zoneinfo
chunk_id
0000081255acf04f13c1c84f4e86f7410a5bd792 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
00000eec027d380439a62cf403242855f96f3867 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0000136d1c63e78b3ab59641a7146a60da2c919e 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0000138611e4779ad8f368689b4c28f7a6a6ee1a 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
000014207524492ac0b6c407d99d481d00e3dc3c 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
ffffe8b423a586793ee52aa7099e20f8a0758e7b 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
ffffeb99a75ba02cc350f3fb587cf05aaf8543fa 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
ffffebdb4ab741c9f40872a832bac4d66d879522 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
fffff44a72b9d75ae9d62414d15f2e3d61c4b7c5 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
fffff95467ad0cd33bdb7f041a99a44ec3649538 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

3107215 rows Ɨ 221 columns

InĀ [36]:
column_percents_by_componentcount = libraries_counts_pivot.sum() / libraries_counts_pivot.sum().sum()
libraries_counts_pivot_above_1_prc_by_componentcount = libraries_counts_pivot.loc[:, column_percents_by_componentcount > 0.0063]  # more like 0.6%
correlation_matrix_above_1_prc_by_componentcount = libraries_counts_pivot_above_1_prc_by_componentcount.corr()

libraries_counts_pivot_above_01_prc_by_componentcount = libraries_counts_pivot.loc[:, column_percents_by_componentcount > 0.001]
correlation_matrix_above_01_prc_by_componentcount = libraries_counts_pivot_above_01_prc_by_componentcount.corr()
InĀ [37]:
mask = np.triu(np.ones_like(correlation_matrix_above_1_prc_by_componentcount, dtype=bool))

plt.figure(figsize=(18, 18))
sns.heatmap(correlation_matrix_above_1_prc_by_componentcount, mask=mask, annot=True, annot_kws={'size': 12}, cmap='coolwarm', linewidths=0.5, fmt='.2f', vmin=-1, vmax=1)
plt.title(r"Correlation Matrix by Component Count")
plt.ylabel("Library Name")
plt.xlabel("Library Name")
plt.xticks(rotation=90, fontsize=14)
plt.yticks(rotation=0, fontsize=14)
plt.show()
No description has been provided for this image
InĀ [38]:
mask = np.triu(np.ones_like(correlation_matrix_above_01_prc_by_componentcount, dtype=bool))

plt.figure(figsize=(20, 20))
sns.heatmap(correlation_matrix_above_01_prc_by_componentcount, mask=mask, annot=True, annot_kws={'size': 8}, cmap='coolwarm', linewidths=0.5, fmt='.1f', vmin=-1, vmax=1)
plt.title(r"Correlation Matrix by Component Count")
plt.ylabel("Library Name")
plt.xlabel("Library Name")
plt.xticks(rotation=90, fontsize=14)
plt.yticks(rotation=0, fontsize=14)
plt.show()
No description has been provided for this image

By Number of Files in Which Libraries Were Used¶

InĀ [39]:
libraries_small = libraries[['chunk_id', 'library_name']].drop_duplicates()
libraries_pivot = libraries_small.pivot_table(index='chunk_id', columns='library_name', aggfunc=len, fill_value=0)
libraries_binary = libraries_pivot.applymap(lambda x: 1 if x > 0 else 0)
libraries_binary
Out[39]:
library_name _thread abc aifc argparse array ast asynchat asyncore atexit audioop ... xml.sax.handler xml.sax.saxutils xml.sax.xmlreader xmlrpc.client xmlrpc.server zipapp zipfile zipimport zlib zoneinfo
chunk_id
0000081255acf04f13c1c84f4e86f7410a5bd792 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
00000eec027d380439a62cf403242855f96f3867 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0000136d1c63e78b3ab59641a7146a60da2c919e 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0000138611e4779ad8f368689b4c28f7a6a6ee1a 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
000014207524492ac0b6c407d99d481d00e3dc3c 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
ffffe8b423a586793ee52aa7099e20f8a0758e7b 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
ffffeb99a75ba02cc350f3fb587cf05aaf8543fa 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
ffffebdb4ab741c9f40872a832bac4d66d879522 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
fffff44a72b9d75ae9d62414d15f2e3d61c4b7c5 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
fffff95467ad0cd33bdb7f041a99a44ec3649538 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

3107215 rows Ɨ 221 columns

InĀ [40]:
column_percents_by_filecount = libraries_binary.sum() / libraries_binary.sum().sum()
libraries_binary_above_1_prc_by_filecount = libraries_binary.loc[:, column_percents_by_filecount > 0.01]
correlation_matrix_above_1_prc_by_filecount = libraries_binary_above_1_prc_by_filecount.corr()

libraries_binary_above_01_prc_by_filecount = libraries_binary.loc[:, column_percents_by_filecount > 0.001]
correlation_matrix_above_01_prc_by_filecount = libraries_binary_above_01_prc_by_filecount.corr()
InĀ [41]:
mask = np.triu(np.ones_like(correlation_matrix_above_1_prc_by_filecount, dtype=bool))

plt.figure(figsize=(18, 18))
sns.heatmap(correlation_matrix_above_1_prc_by_filecount, mask=mask, annot=True, annot_kws={'size': 12}, cmap='coolwarm', linewidths=0.5, fmt='.2f', vmin=-1, vmax=1)
plt.title(r"Correlation Matrix by File Count")
plt.ylabel("Library Name")
plt.xlabel("Library Name")
plt.xticks(rotation=90, fontsize=14)
plt.yticks(rotation=0, fontsize=14)
plt.show()
No description has been provided for this image
InĀ [42]:
mask = np.triu(np.ones_like(correlation_matrix_above_01_prc_by_filecount, dtype=bool))

plt.figure(figsize=(22, 22))
sns.heatmap(correlation_matrix_above_01_prc_by_filecount, mask=mask, annot=True, annot_kws={'size': 8}, cmap='coolwarm', linewidths=0.5, fmt='.1f', vmin=-1, vmax=1)
plt.title(r"Correlation Matrix by File Count")
plt.ylabel("Library Name")
plt.xlabel("Library Name")
plt.xticks(rotation=90, fontsize=14)
plt.yticks(rotation=0, fontsize=14)
plt.show()
No description has been provided for this image

Mean Complexity of Code Files by Library Used in Them¶

InĀ [6]:
plot_mean_complexity(libraries_py, metadata, 10000, 'Mean Complexity of .py Files by Library Used in Them')
No description has been provided for this image
InĀ [5]:
plot_mean_complexity(libraries_py, metadata, 2000, 'Mean Complexity of .py Files by Library Used in Them', figsize=(30, 10))
No description has been provided for this image
InĀ [6]:
!jupyter nbconvert --to html std_library_analysis.ipynb
[NbConvertApp] Converting notebook std_library_analysis.ipynb to html
[NbConvertApp] WARNING | Alternative text is missing on 17 image(s).
[NbConvertApp] Writing 7778604 bytes to std_library_analysis.html